# -*- coding: utf-8 -*-
import sys
reload(sys)
# python默认环境编码时ascii
sys.setdefaultencoding("utf-8")
import scrapy
import re
from dangdang.items import DangdangItem
from scrapy.http import Request
import scrapy
from scrapy.selector import Selector

class DangdangsSpider(scrapy.Spider):
    name = "dangdangs"
    allowed_domains = ["dangdang.com"]
    start_urls = ['http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-1']

    def parse(self, response):
        item = DangdangItem()
        sel = Selector(response)
        result = {}
        result['num'] = sel.xpath('//div[@class="bang_list_box"]/ul/li/div[1]/text()').extract()
        result['title'] = sel.xpath('//div[@class="name"]/a/text()').extract()
        for i in xrange(len(result['title'])):
            item['title'] = result['title'][i]
            item['num'] = result['num'][i][:-1]
            yield item
        for i in xrange(2,25):
            url = "http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-{}".format(i)
            yield Request(url, callback=self.parse)



